import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# 数据集
data_df=pd.read_csv('data/customers.csv')
x = data_df.iloc[:,[2,4]]
print(x)
# 模型训练预测
km =KMeans(n_clusters=4)
y_kmeans =km.fit_predict(x)
print(y_kmeans)

# 可视化
plt.scatter(x.values[y_kmeans==0,0],x.values[y_kmeans==0,1])
plt.scatter(x.values[y_kmeans==1,0],x.values[y_kmeans==1,1])
plt.scatter(x.values[y_kmeans==2,0],x.values[y_kmeans==2,1])
plt.scatter(x.values[y_kmeans==3,0],x.values[y_kmeans==3,1])
plt.scatter(x.values[y_kmeans==4,0],x.values[y_kmeans==4,1])
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1])
plt.show()

